Note, this is adapted from §6.10 of (Montgomery, 2017).1
You conducted a simulation to assess the impact of three factors, A, B, and C at a low and high level each. You used a \(2^3\) full factorial design. You were able to do three replicates of each treatment combination and conducted the simulation in a random order so that you would meet all assumptions. Your data is contained in montgomery_6_1.csv .
#
myData <- read.csv('montgomery_6_1.csv')
head(myData)
## A B C TC Rep.1 Rep.2 Rep.3
## 1 -1 -1 -1 (1) 22 31 25
## 2 1 -1 -1 a 32 43 29
## 3 -1 1 -1 b 35 34 50
## 4 1 1 -1 ab 55 47 46
## 5 -1 -1 1 c 44 45 38
## 6 1 -1 1 ac 40 37 36
Visualize the data using both boxplots and interaction plots. Interpret your results.
# Box plots
# Note, we need to change the Factors to factors (R data types), but I didn't want to yet
# also we need to format our data nicely
myTempData <- myData %>% mutate(A = as.factor(A), B = as.factor(B), C = as.factor(C)) %>% pivot_longer(cols = c(Rep.1, Rep.2, Rep.3), names_to = 'Rep', values_to = 'Response')
ggpubr::ggarrange(
ggplot(myTempData) +
geom_boxplot(aes(x = A, y = Response )),
ggplot(myTempData) +
geom_boxplot(aes(x = B, y = Response )),
ggplot(myTempData) +
geom_boxplot(aes(x = C, y = Response )),
nrow = 2, ncol = 2,
labels = c('A', 'B', 'C')
)
# It appears that B has an impact; perhaps C, though that is less clear, almost certainly not A
# Interaction plots
ggpubr::ggarrange(
ggplot(myTempData, aes(x = A, y = Response, color = B, group = B)) +
stat_summary(fun = 'mean', geom = 'line') +
stat_summary(fun = 'mean', geom = 'point'),
ggplot(myTempData, aes(x = A, y = Response, color = C, group = C)) +
stat_summary(fun = 'mean', geom = 'line') +
stat_summary(fun = 'mean', geom = 'point'),
ggplot(myTempData, aes(x = B, y = Response, color = C, group = C)) +
stat_summary(fun = 'mean', geom = 'line') +
stat_summary(fun = 'mean', geom = 'point'),
ncol = 2, nrow = 2, labels = c('AB', 'AC', 'BC')
)
# We see a pretty clear interaction between A and C, perhaps a bit less so between B and C, and not really between A and B
Estimate the effects using contrasts.
myTempData <- myData %>%
mutate(AB = A*B, AC = A*C, BC = B*C, ABC = A*B*C, Response.Sum = Rep.1 + Rep.2 + Rep.3)
Effect.A <- sum(myTempData$A * myTempData$Response.Sum) / (3*2^(3-1))
Effect.B <- sum(myTempData$B * myTempData$Response.Sum) / (3*2^(3-1))
Effect.C <- sum(myTempData$C * myTempData$Response.Sum) / (3*2^(3-1))
Effect.AB <- sum(myTempData$AB * myTempData$Response.Sum) / (3*2^(3-1))
Effect.AC <- sum(myTempData$AC * myTempData$Response.Sum) / (3*2^(3-1))
Effect.BC <- sum(myTempData$BC * myTempData$Response.Sum) / (3*2^(3-1))
Effect.ABC <- sum(myTempData$ABC * myTempData$Response.Sum) / (3*2^(3-1))
data.frame(Factor = c('A', 'B', 'C', 'AB', 'AC', 'BC', 'ABC'),
Effect = c(Effect.A, Effect.B, Effect.C, Effect.AB, Effect.AC, Effect.BC, Effect.ABC))
## Factor Effect
## 1 A 0.3333333
## 2 B 11.3333333
## 3 C 6.8333333
## 4 AB -1.6666667
## 5 AC -8.8333333
## 6 BC -2.8333333
## 7 ABC -2.1666667
# We see that the effect of B appears to be the largest and clearly non-zero; B additionally
# We see our interactions of AC is significant, with perhaps a 2 way interactino of BC and 3-way with ABC
Conduct a 3-factor ANOVA for the data set and assess your results.
# Format and select data usefully
myTempData <- myData %>%
mutate(A = as.factor(A), B = as.factor(B), C = as.factor(C)) %>%
pivot_longer(cols = c(Rep.1, Rep.2, Rep.3), names_to = 'Rep', values_to = 'Response') %>%
select(-c(TC, Rep))
# Conduct the ANOVA
myAOV <- aov(Response ~ (.)^3, data = myTempData)
# View the response
summary(myAOV)
## Df Sum Sq Mean Sq F value Pr(>F)
## A 1 0.7 0.7 0.022 0.883680
## B 1 770.7 770.7 25.547 0.000117 ***
## C 1 280.2 280.2 9.287 0.007679 **
## A:B 1 16.7 16.7 0.552 0.468078
## A:C 1 468.2 468.2 15.519 0.001172 **
## B:C 1 48.2 48.2 1.597 0.224475
## A:B:C 1 28.2 28.2 0.934 0.348282
## Residuals 16 482.7 30.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# This confirms our graphical and effects analysis that B, C, and AC are all significant at alpha = .01
# Plot for assumptions
par(mfrow = c(2,2))
plot(myAOV)
# We appear to meet assumptions
shapiro.test(myAOV$residuals)
##
## Shapiro-Wilk normality test
##
## data: myAOV$residuals
## W = 0.92106, p-value = 0.06166
bartlett.test(Response ~ TC, data = myData %>% pivot_longer(cols = c(Rep.1, Rep.2, Rep.3), names_to = 'Rep', values_to = 'Response'))
##
## Bartlett test of homogeneity of variances
##
## data: Response by TC
## Bartlett's K-squared = 4.0801, df = 7, p-value = 0.7705
Visualize this design in three dimensions.
myTempData <- myData %>%
mutate(A = as.factor(A), B = as.factor(B), C = as.factor(C)) %>%
pivot_longer(cols = c(Rep.1, Rep.2, Rep.3), names_to = 'Rep', values_to = 'Response')
# ggplot doesn't, insofar as i know have a good 3D plotting capability
# plotly is a nice library for this though
# https://plotly-r.com/d-charts.html has a good example for how to do 3D plots
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# We can look at the design geometrically
plot_ly(myTempData, x = ~A, y = ~B, z = ~C) %>%
add_text(text = ~TC) %>%
add_markers()
myZ.CPos <- as.matrix(
myTempData %>%
filter(C == 1) %>%
group_by(A, B) %>%
summarize(Mean = mean(Response)) %>%
pivot_wider(A, names_from = B, values_from = Mean) %>%
ungroup() %>%
select(-A)
)
myZ.CNeg <- as.matrix(
myTempData %>%
filter(C == -1) %>%
group_by(A, B) %>%
summarize(Mean = mean(Response)) %>%
pivot_wider(A, names_from = B, values_from = Mean) %>%
ungroup() %>%
select(-A)
)
# We can also view a three D surface with interactions, but these get really hard
# to interpret, so this is more for fun.
# I'm not good at plotly so there's absolutely a better way to do this
# when C is high we see the surface in red
# When C is low we see the surface in blue
plot_ly() %>%
add_surface(x = c(-1, 1), y = c(-1, 1), z = ~myZ.CPos, colorscale = list(c(0, 1), c('rgb(255, 0, 0', 'rgb(255, 0, 0)'))) %>%
add_surface(x = c(-1, 1), y = c(-1, 1), z = ~myZ.CNeg, colorscale = list(c(0, 1), c('rgb(0, 0, 255', 'rgb(0, 0, 255)'))) %>%
layout(scene = list(
xaxis = list(title = 'A'),
yaxis = list(title = 'B'),
zaxis = list(title = 'Mean Response')
))
For a 6 factor, full factorial (\(2^6\)) design, with factors A, B, C, D, E, F and levels -1 and 1:
# there are 2^6
2^6
## [1] 64
Recall that \(\sum_{k = 0}^{n} \binom{n}{k} = 2^n\). But we will not have a “no factor effect” so we can subtract one
# The toal number of factors and interaction effects is the sum of 6 choose k where n ranges from 1 to 6.
2^6 - 1
## [1] 63
# There are 63
# We can confirm this manually if desired
choose(6, 1) + choose(6, 2) + choose(6, 3) + choose(6, 4) + choose(6, 5) + choose(6, 6)
## [1] 63
(1)abcdef(1): A = B = C = D = E = F = -1
ab: A = B = 1, C = D = E = F = -1
c: C = 1, A = B = D = E = F = -1
def: A = B = C = -1, D = E = F = 1
ABC*BCD = A(B^2)CD = ACD
AB*ACE*ADEF = (A^3)BCD(E^2)F = ABCDF
Note, this problem was initally a question about a machining tool, but the analysis is equally valid for simulation results↩